library(tidycensus)
library(tidyverse)
## -- Attaching packages -------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.2.1     √ purrr   0.3.3
## √ tibble  2.1.3     √ dplyr   0.8.4
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0
## -- Conflicts ----------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(readxl)
library(ggrepel)
library(mapview)
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
## 
##     plot
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
options(tigris_use_cache = TRUE)

Testing this out

v10d_sf1 <- load_variables(year = 2010, dataset = "sf1", cache = TRUE)
test <- get_decennial(state = "WA", county = "King", geography = "block", year = 2010, variables = "P012001", geometry = TRUE)
## Getting data from the 2010 decennial Census

Folks in Seattle

Set up the table ids

guyvars <- c(paste0("P01200", 3:9), paste0("P0120", 10:25))

#galvars

guy_sumvars <- c("P012001", "P012002")

#gal_sumvars

Get the data

king_guys <- get_decennial(state = "WA", county = "King", geography = "tract", year = 2010, variables = guyvars, summary_var = "P012001", geometry = TRUE)
## Getting data from the 2010 decennial Census

Clean that data

king_guys2 <- king_guys %>%
  left_join(select(v10d_sf1, -concept), by = c("variable" = "name")) %>%
  separate(label, into = c("count_type", "sex", "age"), sep = "!!") %>%
  mutate(age = str_replace(age, "Under", "0 to"),
         age = str_replace(age, "years and over", "to 100 years"), 
         min_age = as.integer(str_extract(age, "\\d+")),
         max_age = as.integer(str_extract(age, "\\d+ years$") %>%
           str_extract("\\d+"))) %>%
  clean_names()

Quick peak

king_guys2 %>%
  mutate(census_tract = str_extract(name, "\\d+\\.?\\d?\\d?")) %>%
  ggplot(aes(x = census_tract, y = value)) +
  geom_col() +
  facet_wrap(~age, nrow = 1) +
  coord_flip()

ggsave("king_county_dudes_by_age.pdf", width = 24, height = 36)

EDA

Median Age

median_age_both_sex <- "P013001"

king_mage <- get_decennial(state = "WA", county = "King", geography = "tract", year = 2010, variables = c(median_age = median_age_both_sex), geometry = TRUE)
## Getting data from the 2010 decennial Census
king_mage %<>%
  clean_names() %>%
  separate(name, into = c("tract", "county", "state"), sep = ",") %>%
  mutate(tract = str_extract(tract, "\\d+\\.?\\d?\\d?$"),
         county = str_remove(county, " County"))

Figuring out what census tracts belong to Seattle

seattle_data <- read_excel("SeattleCensusBlocksandNeighborhoodCorrelationFile.xlsx")

seattle_tracts <- seattle_data %>%
  clean_names() %>%
  select(tract_10, urban_village_name, cra_name, neighborhoods_included, neighborhood_district_name) %>%
  mutate(tract = str_replace(as.character(tract_10), "(\\d+)(\\d\\d)$", "\\1\\.\\2") %>%
           str_replace("(\\d+)\\.00", "\\1")) %>%
  select(tract, everything(), -tract_10) %>%
  filter(tract != "9901") # this is water around vashon island

Combine tables

seattle_mage <- king_mage %>%
  semi_join(seattle_tracts, by = "tract")

seattle_mage %>%
  ggplot(aes(y = value, x = fct_reorder(tract, value))) +
  geom_point() +
  coord_flip()

seattle_mage %>%
  ggplot(aes(x = value)) +
  geom_histogram(binwidth = 2)

sea_tracts_flat <- seattle_tracts %>%
  group_by(tract, cra_name) %>%
  tally() %>%
  select(-n) %>%
  ungroup()

sea_neighs_mage <- king_mage %>%
  left_join(sea_tracts_flat, by = "tract") %>%
  filter(!is.na(cra_name))

sea_neighs_mage %>%
  ggplot(aes(y = value, x = fct_reorder(paste0(tract, ": ", cra_name), value), col = if_else(tract %in% c(86, 87, 90), TRUE, FALSE))) +
  geom_point() +
  coord_flip() +
  theme(legend.position = "none")

sea_neighs_mage %>%
  ggplot(aes(y = value, x = fct_reorder(tract, value), label = cra_name)) +
  geom_point() +
  geom_text(aes(hjust = if_else(value > median(value), 1.1, -0.1))) +
  coord_flip()

sea_neighs_mage %>%
  ggplot(aes(y = value, x = fct_reorder(tract, value), label = cra_name)) +
  geom_point() +
  geom_text_repel() +
  coord_flip()

sea_neighs_mage %>%
  ggplot(aes(fill = value)) +
  geom_sf() +
  scale_fill_viridis_c()

mapview(sea_neighs_mage, zcol = "value", legend = TRUE)

Trying above with differnt shape files

median_age_both_sex <- "P013001"

king_mage2 <- get_decennial(state = "WA", county = "King", geography = "tract", year = 2010, variables = c(median_age = median_age_both_sex), geometry = TRUE, keep_geo_vars = TRUE)
## Getting data from the 2010 decennial Census

Trying with the water to make it look nice.

# get the water
king_water <- area_water("WA", "King", class = "sf")

king2_erased <- st_difference(king_mage2, st_union(st_combine(king_water), by_feature = TRUE))
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
king_mage2_clean <- king2_erased %>%
  clean_names() %>%
  select(-geo_id, -state, -county, -tract, name = name_y, -name_x, -lsad, -countyfp, -statefp, -geoid) %>%
  separate(name, into = c("tract", "county", "state"), sep = ",") %>%
  mutate(tract = str_extract(tract, "\\d+\\.?\\d?\\d?$"),
         county = str_remove(county, " County"))

sea_neighs_mage2 <- king_mage2_clean %>%
  left_join(sea_tracts_flat, by = "tract") %>%
  filter(!is.na(cra_name))

sea_neighs_mage2 %>%
  ggplot(aes(fill = value)) +
  geom_sf() +
  scale_fill_viridis_c() +
  theme_bw() +
  labs(fill = "age")

king_mage2_clean %>%
  ggplot(aes(fill = value)) +
  geom_sf() +
  scale_fill_viridis_c() +
  theme_bw() +
  labs(fill = "age")

mapview(sea_neighs_mage2, zcol = "value", legend = TRUE)
mapview(king_mage2_clean, zcol = "value", legend = TRUE)